*************************************************************************************************************************************************************
*Load Data:
*************************************************************************************************************************************************************

*cd change directory to where downloaded data are saved
use Data_case_level.dta

*************************************************************************************************************************************************************
*Basic cleaning:
*************************************************************************************************************************************************************

drop if disp_year == 1994
drop if ch_off_group == "homicide"

*************************************************************************************************************************************************************
*Defining the controls:
*************************************************************************************************************************************************************

*Defendant's characteristics:
*sex:
replace sex = "" if sex == "U" | sex == "X"
generate female = 0
replace female = 1 if sex == "F" & sex != ""
*race:
replace race = "" if race == "U" | race == "X"
generate black = 0
replace black = 1 if race == "B" & race != ""
generate hispanic = 0
replace hispanic = 1 if race == "H" & race != ""
*age:
gen age2 = age^2
drop if sex == "" | race == ""
*method of disposition:
gen bargain = 0
replace bargain = 1 if plea == "GL" | plea == "GU" | plea == "RL" | plea == "RS" | plea == "NC"
*attorney type:
gen priv_attorney = 0
replace priv_attorney = 1 if attorney == "R" & attorney != ""
gen pub_defender = 0
replace pub_defender = 1 if attorney == "P" & attorney != ""
drop if attorney == ""

*Previous points classification:
*Level 1: 0 pts
*Level 2: 1-4 pts
*Level 3: 5-8 pts
*Level 4: 9-14 pts
*Level 5: 15-18 pts
*Level 6: 19+ pts
*How are points assigned?
*Class A felony: 10 pts
*Class B1 felony: 9 pts
*Class B2, C or D felony: 6 pts
*Class E, F or G felony: 4 pts
*Class H or I felony: 2 pts
*Class A1 and 1 misdemeanor / impaired driving misdemeanor: 1 pt
gen prev_hist_2 = 0
replace prev_hist_2 = 1 if histpt > 0 & histpt <= 4 & histpt != .
gen prev_hist_3 = 0
replace prev_hist_3 = 1 if histpt > 4 & histpt <= 8 & histpt != .
gen prev_hist_4 = 0
replace prev_hist_4 = 1 if histpt > 8 & histpt <= 14 & histpt != .
gen prev_hist_5 = 0
replace prev_hist_5 = 1 if histpt > 14 & histpt <= 18 & histpt != .
gen prev_hist_6 = 0
replace prev_hist_6 = 1 if histpt > 18 & histpt != . & histpt != .

*Generating dummies for offense group, offense class, judge, district and year:
*offense class:
tabulate ch_off_class, generate (ch_class)
*offense group
tabulate ch_off_group, generate (ch_group)
gen felony = 0
replace felony = 1 if ch_off_class == "A" | ch_off_class == "B1" | ch_off_class == "B2" | ch_off_class == "C" | ch_off_class == "D" | ch_off_class == "E" | ch_off_class == "F" | ch_off_class == "G" | ch_off_class == "H" | ch_off_class == "I"
*county number
tabulate county_nb, generate (ct_nb)
*judge dummy
tabulate judge, generate (ju)
*district dummy
tabulate district, generate(di)
*disposition year:
*only one observation for 1994. 
drop if disp_year == 1994
tabulate disp_year, generate (year)

*Creating a string variable with judges numbers as defined in the "tabulate" command:
gen str6 judgecode = "***"
foreach num of numlist 1/172 {
replace judgecode = "ju`num'" if ju`num' == 1
}

drop if disp_year < year_in | disp_year > year_out
drop if ch_off_class == "-99"

*Saving the data
sort judgecode 
save Data_for_regressions, replace

*************************************************************************************************************************************************************
*Defining the dependent variable and treatment:
*************************************************************************************************************************************************************

gen log_sent = log(sentence)

*Law that changed elections from statewide to districtwide was ratified on 08/02/1996.
*Treatment for judges in activity when the law changed:
gen aft_treat = 0
replace aft_treat = 1 if dispdt >= 19960802

*"Bush" measures of electorate's preferences: (0.56463997 is the statewide share for Bush in 2000)
gen dist_b = bush - .56463997
gen dist_b2 = dist_b^2
gen cons_b = 0
replace cons_b = dist_b if dist_b > 0
gen cons_b2 = 0
replace cons_b2 = dist_b2 if dist_b > 0
gen lib_b = 0
replace lib_b = abs(dist_b) if dist_b < 0
gen lib_b2 = 0
replace lib_b2 = dist_b2 if dist_b < 0

gen str20 electorate = "Liberal districts"
replace electorate = "Conservative districts" if bush > .56463997
gen str20 a_treat = "Before Bill 41"
replace a_treat = "Post Bill 41" if aft_treat == 1

gen treat_cons = aft_treat*cons_b
gen treat_cons2 = aft_treat*cons_b2
gen treat_lib = aft_treat*lib_b
gen treat_lib2 = aft_treat*lib_b2

*************************************************************************************************************************************************************
*Main specifications: asymmetric effects
*************************************************************************************************************************************************************

egen double_cluster=group(judge aft_treat)

eststo clear
eststo: quietly reg log_sent treat_cons treat_lib aft_treat bargain age age2 female black hispanic priv_attorney pub_defender prev_hist_2 prev_hist_3 prev_hist_4 prev_hist_5 prev_hist_6 ct_nb2-ct_nb100 ch_class2-ch_class14 year2-year16 ju2-ju172 if active == "A", vce(cluster double_cluster)
eststo: quietly reg log_sent treat_cons treat_cons2 treat_lib treat_lib2 aft_treat bargain age age2 female black hispanic priv_attorney pub_defender prev_hist_2 prev_hist_3 prev_hist_4 prev_hist_5 prev_hist_6 ct_nb2-ct_nb100 ch_class2-ch_class14 year2-year16 ju2-ju172 if active == "A", vce(cluster double_cluster)
eststo: quietly reg log_sent treat_cons treat_cons2 treat_lib aft_treat bargain age age2 female black hispanic priv_attorney pub_defender prev_hist_2 prev_hist_3 prev_hist_4 prev_hist_5 prev_hist_6 ct_nb2-ct_nb100 ch_class2-ch_class14 year2-year16 ju2-ju172 if active == "A", vce(cluster double_cluster)
esttab, drop(ct_nb* ch_class* year* ju*) star(* 0.10 ** 0.05 *** 0.01) se ar2
esttab using main_table_1.csv, drop(ct_nb* ch_class* year* ju*) nostar se ar2 b(3) se(3) replace

*Descriptive statistics:
gen stats_sample = 0
replace stats_sample = 1 if e(sample)==1
tab bargain if stats_sample == 1
tab felony if stats_sample == 1
tab black if stats_sample == 1
tab hispanic if stats_sample == 1
tab female if stats_sample == 1
sum age if stats_sample == 1
bysort bargain: sum sentence if stats_sample == 1
tab aft_treat if stats_sample == 1

********************************************************************************
*Descriptive statistics by period:
ttest felony if stats_sample == 1, by(aft_treat)
ttest black if stats_sample == 1, by(aft_treat)
ttest hispanic if stats_sample == 1, by(aft_treat)
ttest female if stats_sample == 1, by(aft_treat)
ttest age if stats_sample == 1, by(aft_treat)

*Descriptive statistics by period (cases decided before 1998):
ttest felony if stats_sample == 1 & dispdt <= 19980101, by(aft_treat)
ttest black if stats_sample == 1 & dispdt <= 19980101, by(aft_treat)
ttest hispanic if stats_sample == 1 & dispdt <= 19980101, by(aft_treat)
ttest female if stats_sample == 1 & dispdt <= 19980101, by(aft_treat)
ttest age if stats_sample == 1 & dispdt <= 19980101 , by(aft_treat)

********************************************************************************

*Split sample:
eststo clear
eststo: quietly reg log_sent treat_cons treat_cons2 aft_treat bargain age age2 female black hispanic priv_attorney pub_defender prev_hist_2 prev_hist_3 prev_hist_4 prev_hist_5 prev_hist_6 ct_nb2-ct_nb100 ch_class2-ch_class14 year2-year16 ju2-ju172 if active == "A" & cons_b > 0, vce(cluster double_cluster)
eststo: quietly reg log_sent treat_lib aft_treat bargain age age2 female black hispanic priv_attorney pub_defender prev_hist_2 prev_hist_3 prev_hist_4 prev_hist_5 prev_hist_6 ct_nb2-ct_nb100 ch_class2-ch_class14 year2-year16 ju2-ju172 if active == "A" & lib_b > 0, vce(cluster double_cluster)
esttab, drop(ct_nb* ch_class* year* ju*) star(* 0.10 ** 0.05 *** 0.01) se ar2
esttab using main_table_2.csv, drop(ct_nb* ch_class* year* ju*) nostar se ar2 b(3) se(3) replace


*************************************************************************************************************************************************************
*Figures:
*************************************************************************************************************************************************************

*Normalized sentence
gen norm_sent = (sentence - pres_min/30.5)/(pres_max/30.5 - pres_min/30.5)
*"censored" dependent variable:
gen cens_sent = norm_sent 
replace cens_sent = 1 if norm_sent > 1
replace cens_sent = 0 if norm_sent < 0

*I didn't calculate presumptive sentences for cases after 12/01/09 (the rules change at that point), so I'll drop them.
drop if chargedt > 20091201

*Cross section:
graph set eps logo off
distplot cens_sent if norm_sent ~= . & active == "A" , over (electorate) note(All cases from 1995 to 2009) caption(Cumulative distribution function - whole sample) xtitle(Normalized minimum sentence) ytitle("") legend(label(1 "Conservative districts") label(2 "Liberal districts")) graphregion(color(white))
graph export "Liberal_conservative_cross_section_CDF.eps", as(eps) replace
*Diff in diff:
graph set eps logo off
distplot cens_sent if norm_sent ~= . & active == "A" & year_in <= 1996 & disp_year <= 1997, over (electorate) by (a_treat, note(Cases from 1995 to 1997 decided by judges serving at least since 1996) caption(Cumulative distribution function)) ytitle("") xtitle(Normalized minimum sentence) legend(label(1 "Conservative" "districts") label(2 "Liberal" "districts")) graphregion(color(white)) bgcolor(white)
graph export "Liberal_conservative_did_CDF.eps", as(eps) replace
gen str15 quartile = "."
replace quartile = "Group 1" if bush <= .4758449
replace quartile = "Group 2" if bush <= .5537842 & bush > .4758449    
replace quartile = "Group 3" if bush <= .6298198 & bush > .5537842    
replace quartile = "Group 4" if bush > .6298198
gen quart_group = 0
replace quart_group = 1 if bush <= .4758449
replace quart_group = 2 if bush <= .5537842 & bush > .4758449    
replace quart_group = 3 if bush <= .6298198 & bush > .5537842    
replace quart_group = 4 if bush > .6298198
graph set eps logo off
distplot cens_sent if norm_sent ~= . & active == "A" & year_in <= 1996 & disp_year <= 1997, over (aft_treat) by (quartile, note(Cases from 1995 to 1997 decided by judges serving at least since 1996) caption(Cumulative distribution function)) ytitle("") xtitle(Normalized minimum sentence) legend(label(1 "Before Bill 41") label(2 "After Bill 41")) graphregion(color(white)) bgcolor(white)
graph export "Four_groups_did_CDF.eps", as(eps) replace
*Kolmogorov-Smirnov Test:
ksmirnov cens_sent if active == "A" & norm_sent != . & year_in <= 1996 & disp_year <= 1997, by (electorate)

*************************************************************************************************************************************************************
*Turnover regressions:
*************************************************************************************************************************************************************

clear

*set mem 700000
*set more off

*Merging Judges.dta with pres_2000_votes.dta:
use "pres_2000_votes.dta", clear
sort district
save "pres_2000_votes_merge.dta", replace
use "Judges.dta"
replace district = "7C" if district == "7BC"
replace district = "20A" if judge == "JMW"
replace district = "20B" if judge == "SCT"
sort district
merge district using "pres_2000_votes_merge.dta"
save "Judges_extended.dta", replace
drop if _merge ~= 3
drop _merge

*out_early indicates exit until the end of the last possibe "first election" period for judges whith year_in <= 1996.
gen out_early = 0
replace out_early = 1 if year_out <= 2003

gen dist_b = bush - .56463997
gen a_dist_b = abs(dist_b)
gen cons_b = 0
replace cons_b = dist_b if dist_b > 0
gen lib_b = 0
replace lib_b = abs(dist_b) if dist_b < 0

eststo clear
eststo: quietly probit out_early lib_b cons_b if year_in <= 1996
esttab, star(* 0.10 ** 0.05 *** 0.01) se ar2

*************************************************************************************************************************************************************
*Making data for Matlab:
*************************************************************************************************************************************************************

clear
*set mem 700000
*set matsize 600
*set more off

*cd change directory to where downloaded data are saved
use Data_case_level.dta

drop if disp_year == 1994
drop if ch_off_group == "homicide"

gen aft_treat = 0
replace aft_treat = 1 if dispdt >= 19960802

gen bargain = 0
replace bargain = 1 if plea == "GL" | plea == "GU" | plea == "RL" | plea == "RS" | plea == "NC"

generate jail = 0
replace jail = 1 if active == "A"

gen quart_group = 0
replace quart_group = 1 if bush <= .4758449
replace quart_group = 2 if bush <= .5537842 & bush > .4758449    
replace quart_group = 3 if bush <= .6298198 & bush > .5537842    
replace quart_group = 4 if bush > .6298198

keep if ch_off_group != "assault" &  ch_off_group != "sexual" &  ch_off_group != "kidnapping" &  ch_off_group != "homicide" &  ch_off_group != "property" & ch_off_group != "drugs"

keep disp_year dismiss bargain jail sentence quart_group aft_treat

*Replacing missing values (will use -999 instead):
foreach vr of varlist disp_year-quart_group {
replace `vr' = -999 if `vr' == .
}

*Saving
outsheet disp_year-quart_group using "Data_for_MATLAB.csv", comma replace

